From 705ea2b54c35ca51c24dee51e4d88ffb8a7dc889 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Sat, 25 Feb 2006 21:28:27 +0100 Subject: [PATCH] New VCPUOP_register_runstate_memory_area hypercall. Avoids need for a hypercall in the guest timer interrupt handler. Cleaned up stolen/blocked tick handling in Linux. Signed-off-by: Keir Fraser --- .../arch/i386/kernel/time-xen.c | 89 ++++++++++--------- xen/arch/x86/domain.c | 5 ++ xen/common/domain.c | 22 +++++ xen/include/public/vcpu.h | 23 ++++- xen/include/xen/sched.h | 1 + 5 files changed, 99 insertions(+), 41 deletions(-) diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c index 8a82b9ca61..864bf17daf 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c @@ -130,6 +130,9 @@ static DEFINE_PER_CPU(u64, processed_system_time); static DEFINE_PER_CPU(u64, processed_stolen_time); static DEFINE_PER_CPU(u64, processed_blocked_time); +/* Current runstate of each CPU (updated automatically by the hypervisor). */ +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); + /* Must be signed, as it's compared with s64 quantities which can be -ve. */ #define NS_PER_TICK (1000000000LL/HZ) @@ -575,19 +578,36 @@ EXPORT_SYMBOL(profile_pc); irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { s64 delta, delta_cpu, stolen, blocked; + u64 sched_time; int i, cpu = smp_processor_id(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); - struct vcpu_runstate_info runstate; + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); write_seqlock(&xtime_lock); do { get_time_values_from_xen(); + /* Obtain a consistent snapshot of elapsed wallclock cycles. */ delta = delta_cpu = shadow->system_timestamp + get_nsec_offset(shadow); delta -= processed_system_time; delta_cpu -= per_cpu(processed_system_time, cpu); + + /* + * Obtain a consistent snapshot of stolen/blocked cycles. We + * can use state_entry_time to detect if we get preempted here. + */ + do { + sched_time = runstate->state_entry_time; + barrier(); + stolen = runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline] - + per_cpu(processed_stolen_time, cpu); + blocked = runstate->time[RUNSTATE_blocked] - + per_cpu(processed_blocked_time, cpu); + barrier(); + } while (sched_time != runstate->state_entry_time); } while (!time_values_up_to_date(cpu)); @@ -619,60 +639,44 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_sequnlock(&xtime_lock); - /* Obtain stolen/blocked cycles, if the hypervisor supports it. */ - if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, - cpu, &runstate) == 0) { - /* - * Account stolen ticks. - * HACK: Passing NULL to account_steal_time() - * ensures that the ticks are accounted as stolen. - */ - stolen = runstate.time[RUNSTATE_runnable] + - runstate.time[RUNSTATE_offline] - - per_cpu(processed_stolen_time, cpu); - if (unlikely(stolen < 0)) /* clock jitter */ - stolen = 0; + /* + * Account stolen ticks. + * HACK: Passing NULL to account_steal_time() + * ensures that the ticks are accounted as stolen. + */ + if (stolen > 0) { delta_cpu -= stolen; - if (unlikely(delta_cpu < 0)) { - stolen += delta_cpu; - delta_cpu = 0; - } do_div(stolen, NS_PER_TICK); per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK; + per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK; account_steal_time(NULL, (cputime_t)stolen); + } - /* - * Account blocked ticks. - * HACK: Passing idle_task to account_steal_time() - * ensures that the ticks are accounted as idle/wait. - */ - blocked = runstate.time[RUNSTATE_blocked] - - per_cpu(processed_blocked_time, cpu); - if (unlikely(blocked < 0)) /* clock jitter */ - blocked = 0; + /* + * Account blocked ticks. + * HACK: Passing idle_task to account_steal_time() + * ensures that the ticks are accounted as idle/wait. + */ + if (blocked > 0) { delta_cpu -= blocked; - if (unlikely(delta_cpu < 0)) { - blocked += delta_cpu; - delta_cpu = 0; - } do_div(blocked, NS_PER_TICK); per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK; + per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK; account_steal_time(idle_task(cpu), (cputime_t)blocked); - - per_cpu(processed_system_time, cpu) += - (stolen + blocked) * NS_PER_TICK; } + /* Account user/system ticks. */ if (delta_cpu > 0) { do_div(delta_cpu, NS_PER_TICK); + per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; if (user_mode(regs)) account_user_time(current, (cputime_t)delta_cpu); else account_system_time(current, HARDIRQ_OFFSET, (cputime_t)delta_cpu); - per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; } + /* Local timer processing (see update_process_times()). */ run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_mode(regs)); @@ -684,14 +688,19 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) static void init_missing_ticks_accounting(int cpu) { - struct vcpu_runstate_info runstate = { 0 }; + struct vcpu_register_runstate_memory_area area; + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); + + memset(runstate, 0, sizeof(*runstate)); - HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate); + area.addr.v = runstate; + HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area); - per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked]; + per_cpu(processed_blocked_time, cpu) = + runstate->time[RUNSTATE_blocked]; per_cpu(processed_stolen_time, cpu) = - runstate.time[RUNSTATE_runnable] + - runstate.time[RUNSTATE_offline]; + runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline]; } /* not static: needed by APM */ diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 1cec5b9aa0..4f7da5a96c 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -784,6 +784,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next) context_saved(prev); + /* Update per-VCPU guest runstate shared memory area (if registered). */ + if ( next->runstate_guest != NULL ) + __copy_to_user(next->runstate_guest, &next->runstate, + sizeof(next->runstate)); + schedule_tail(next); BUG(); } diff --git a/xen/common/domain.c b/xen/common/domain.c index 598d7e1b69..b80d8398e4 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -461,6 +461,28 @@ long do_vcpu_op(int cmd, int vcpuid, void *arg) break; } + case VCPUOP_register_runstate_memory_area: + { + struct vcpu_register_runstate_memory_area area; + + rc = -EINVAL; + if ( v != current ) + break; + + rc = -EFAULT; + if ( copy_from_user(&area, arg, sizeof(area)) ) + break; + + if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) ) + break; + + rc = 0; + v->runstate_guest = area.addr.v; + __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate)); + + break; + } + default: rc = -ENOSYS; break; diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h index 8a425b57da..1c36f81655 100644 --- a/xen/include/public/vcpu.h +++ b/xen/include/public/vcpu.h @@ -53,7 +53,7 @@ /* * Return information about the state and running time of a VCPU. - * @extra_arg == pointer to xen_vcpu_info structure. + * @extra_arg == pointer to vcpu_runstate_info structure. */ #define VCPUOP_get_runstate_info 4 typedef struct vcpu_runstate_info { @@ -85,6 +85,27 @@ typedef struct vcpu_runstate_info { */ #define RUNSTATE_offline 3 +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical, depending on the + * platform. The virtual address should be registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +typedef struct vcpu_register_runstate_memory_area { + union { + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +} vcpu_register_runstate_memory_area_t; + #endif /* __XEN_PUBLIC_VCPU_H__ */ /* diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index f6ab18b31b..91f457702a 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -70,6 +70,7 @@ struct vcpu void *sched_priv; /* scheduler-specific data */ struct vcpu_runstate_info runstate; + struct vcpu_runstate_info *runstate_guest; /* guest address */ unsigned long vcpu_flags; -- 2.30.2